Load required packages.

library(devtools)
mypackages <- c("ggplot2", "tidyverse")
lapply(mypackages, require, character.only = TRUE)
[[1]]
[1] TRUE

[[2]]
[1] TRUE
library(SomaDataIO)

Read in data and check format.

file <- "~/Desktop/DRG/Proteomics/10_28_22_Merged_With_Legend_SS-2216784_v4.1_EDTAPlasma.hybNorm.medNormInt.plateScale.calibration.anmlQC.qcCheck.anmlSMP.adat"
adat <- read_adat(file)
is.soma_adat(adat)
[1] TRUE
adat
══ SomaScan Data ═══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════
     Attributes intact    ✓
     Rows                 72
     Columns              7640
     Clinical Data        44
     Features             7596
── Column Meta ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
ℹ SeqId, SeqIdVersion, SomaId, TargetFullName, Target, UniProt, EntrezGeneID, EntrezGeneSymbol, Organism, Units, Type, Dilution, PlateScale_Reference,
ℹ CalReference, Cal_PLT14022, ColCheck, CalQcRatio_PLT14022_200170, QcReference_200170, Dilution2
── Tibble ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════

The first part of this notebook is assessing other collagen entities.

Subset df to only desired columns and rename to Entrez gene ID for ease of discrimination.

collagen_list <- c('seq.11140.56',
'seq.13484.69',
'seq.15466.30',
'seq.6631.17',
'seq.22047.46',
'seq.11150.3',
'seq.16828.8',
'seq.4807.13',
'seq.15653.9',
'seq.6570.1',
'seq.8974.172',
'seq.8804.39',
'seq.4543.65',
'seq.7006.4',
'seq.10702.1',
'seq.16753.46',
'seq.11278.4',
'seq.20175.17',
'seq.11196.31',
'seq.10511.10',
'seq.11155.16',
'seq.15467.10',
'seq.6236.51',
'seq.15569.15',
'seq.18880.81')

collagen_names <- c('COL1A1.1',
'COL1A1.2',
'COL9A1.1',
'COL9A1.2',
'COL5A1',
'COL6A1.1',
'COL6A1.2',
'COL8A1',
'COL10A1',
'COL13A1',
'COL15A1',
'COL20A1',
'COL23A1',
'COL25A1',
'COL28A1',
'COL6A2',
'COL11A2',
'COL9A3',
'COL6A3.1',
'COL6A3.2',
'COL6A5',
'CTHRC1.1',
'CTHRC1.2',
'COL2A1',
'COL3A1')

sub_adat <- adat %>% select('Cohort', all_of(collagen_list))
sub_adat <- sub_adat %>% rename_at(vars(collagen_list), ~ collagen_names)
sub_adat
══ SomaScan Data ═══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════
     Attributes intact    ✓
     Rows                 72
     Columns              26
     Clinical Data        26
     Features             0
── Column Meta ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
ℹ SeqId, SeqIdVersion, SomaId, TargetFullName, Target, UniProt, EntrezGeneID, EntrezGeneSymbol, Organism, Units, Type, Dilution, PlateScale_Reference,
ℹ CalReference, Cal_PLT14022, ColCheck, CalQcRatio_PLT14022_200170, QcReference_200170, Dilution2
── Tibble ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════

Perform t-tests between Control and Dupuytren conditions for each protein.

Cohort <- sub_adat$Cohort
res <- sub_adat %>% 
  select_if(is.numeric) %>%
  map_df(~ broom::tidy(t.test(. ~ Cohort)$p.value), .id = 'Protein')
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
res %>% rename('p-value' = 'x')

Loop over df and visualize data.

for(i in collagen_names) {
  print(ggplot(sub_adat, aes(x=Cohort)) + geom_boxplot(aes_string(y= i)))
}

The final part of this notebook is assessing other collagen related and modifying proteins (among some other collagen proteins as well).

Subset df to only desired columns and rename to Entrez gene ID for ease of discrimination.

protein_list <- c("seq.10479.18", "seq.10511.10","seq.10612.18", "seq.10800.15","seq.11150.3","seq.11196.31","seq.11237.49","seq.11348.132","seq.11645.9","seq.13535.2","seq.13950.9"  ,"seq.15466.30","seq.15569.15","seq.16753.46","seq.16828.8","seq.18875.125","seq.18880.81","seq.20175.17","seq.2201.17","seq.22047.46","seq.2579.17","seq.2788.55"  ,"seq.2789.26","seq.2954.56","seq.3348.49","seq.4160.49","seq.4496.60","seq.4543.65","seq.4924.32","seq.4925.54","seq.5002.76","seq.5638.23","seq.6273.58",
"seq.6383.90","seq.6570.1","seq.6631.17","seq.8475.15","seq.8479.4" ,"seq.8845.2" ,"seq.9172.69")

protein_names <- c("MMP10_1","COL6A3_1","PLOD3","SERPINH1","COL6A1_1","COL6A3_2","PCOLCE","P4HA2","P4HA1","CERT1_1","CERT1_2","COL9A1_1","COL2A1_1","COL6A2","COL6A1_2","COL2A1_2","COL3A1","COL9A3","COL18A1","COL5A1","MMP9","MMP3","MMP7_1","MMP8_1","BMP1","MMP2","MMP12","COL23A1","MMP1","MMP13",
"MMP14","COLGALT1","P3H1","TLL1","COL13A1","COL9A1_2","MMP7_2","MMP10_2","ADAMTS3","MMP8_2")

sub_adat2 <- adat %>% select('Cohort', all_of(protein_list))
sub_adat2 <- sub_adat2 %>% rename_at(vars(protein_list), ~ protein_names)
sub_adat2
══ SomaScan Data ═══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════
     Attributes intact    ✓
     Rows                 72
     Columns              41
     Clinical Data        41
     Features             0
── Column Meta ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
ℹ SeqId, SeqIdVersion, SomaId, TargetFullName, Target, UniProt, EntrezGeneID, EntrezGeneSymbol, Organism, Units, Type, Dilution, PlateScale_Reference,
ℹ CalReference, Cal_PLT14022, ColCheck, CalQcRatio_PLT14022_200170, QcReference_200170, Dilution2
── Tibble ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════

Perform t-tests between Control and Dupuytren conditions for each protein.

Cohort <- sub_adat2$Cohort
res <- sub_adat2 %>% 
  select_if(is.numeric) %>%
  map_df(~ broom::tidy(t.test(. ~ Cohort)$p.value), .id = 'Protein')
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
res %>% rename('p-value' = 'x')

Loop over df and visualize data.

for(i in protein_names) {
  print(ggplot(sub_adat2, aes(x=Cohort)) + geom_boxplot(aes_string(y= i)))
}

LS0tCnRpdGxlOiAiQ29sbGFnZW4gYW5kIHJlbGF0ZWQgcHJvdGVpbnMgbGlzdCBmb3IgTm9yZGljIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpMb2FkIHJlcXVpcmVkIHBhY2thZ2VzLgpgYGB7cn0KbGlicmFyeShkZXZ0b29scykKbXlwYWNrYWdlcyA8LSBjKCJnZ3Bsb3QyIiwgInRpZHl2ZXJzZSIpCmxhcHBseShteXBhY2thZ2VzLCByZXF1aXJlLCBjaGFyYWN0ZXIub25seSA9IFRSVUUpCmxpYnJhcnkoU29tYURhdGFJTykKYGBgCgpSZWFkIGluIGRhdGEgYW5kIGNoZWNrIGZvcm1hdC4KYGBge3J9CmZpbGUgPC0gIn4vRGVza3RvcC9EUkcvUHJvdGVvbWljcy8xMF8yOF8yMl9NZXJnZWRfV2l0aF9MZWdlbmRfU1MtMjIxNjc4NF92NC4xX0VEVEFQbGFzbWEuaHliTm9ybS5tZWROb3JtSW50LnBsYXRlU2NhbGUuY2FsaWJyYXRpb24uYW5tbFFDLnFjQ2hlY2suYW5tbFNNUC5hZGF0IgphZGF0IDwtIHJlYWRfYWRhdChmaWxlKQppcy5zb21hX2FkYXQoYWRhdCkKYWRhdApgYGAKClRoZSBmaXJzdCBwYXJ0IG9mIHRoaXMgbm90ZWJvb2sgaXMgYXNzZXNzaW5nIG90aGVyIGNvbGxhZ2VuIGVudGl0aWVzLgoKU3Vic2V0IGRmIHRvIG9ubHkgZGVzaXJlZCBjb2x1bW5zIGFuZCByZW5hbWUgdG8gRW50cmV6IGdlbmUgSUQgZm9yIGVhc2Ugb2YgZGlzY3JpbWluYXRpb24uCmBgYHtyfQpjb2xsYWdlbl9saXN0IDwtIGMoJ3NlcS4xMTE0MC41NicsCidzZXEuMTM0ODQuNjknLAonc2VxLjE1NDY2LjMwJywKJ3NlcS42NjMxLjE3JywKJ3NlcS4yMjA0Ny40NicsCidzZXEuMTExNTAuMycsCidzZXEuMTY4MjguOCcsCidzZXEuNDgwNy4xMycsCidzZXEuMTU2NTMuOScsCidzZXEuNjU3MC4xJywKJ3NlcS44OTc0LjE3MicsCidzZXEuODgwNC4zOScsCidzZXEuNDU0My42NScsCidzZXEuNzAwNi40JywKJ3NlcS4xMDcwMi4xJywKJ3NlcS4xNjc1My40NicsCidzZXEuMTEyNzguNCcsCidzZXEuMjAxNzUuMTcnLAonc2VxLjExMTk2LjMxJywKJ3NlcS4xMDUxMS4xMCcsCidzZXEuMTExNTUuMTYnLAonc2VxLjE1NDY3LjEwJywKJ3NlcS42MjM2LjUxJywKJ3NlcS4xNTU2OS4xNScsCidzZXEuMTg4ODAuODEnKQoKY29sbGFnZW5fbmFtZXMgPC0gYygnQ09MMUExLjEnLAonQ09MMUExLjInLAonQ09MOUExLjEnLAonQ09MOUExLjInLAonQ09MNUExJywKJ0NPTDZBMS4xJywKJ0NPTDZBMS4yJywKJ0NPTDhBMScsCidDT0wxMEExJywKJ0NPTDEzQTEnLAonQ09MMTVBMScsCidDT0wyMEExJywKJ0NPTDIzQTEnLAonQ09MMjVBMScsCidDT0wyOEExJywKJ0NPTDZBMicsCidDT0wxMUEyJywKJ0NPTDlBMycsCidDT0w2QTMuMScsCidDT0w2QTMuMicsCidDT0w2QTUnLAonQ1RIUkMxLjEnLAonQ1RIUkMxLjInLAonQ09MMkExJywKJ0NPTDNBMScpCgpzdWJfYWRhdCA8LSBhZGF0ICU+JSBzZWxlY3QoJ0NvaG9ydCcsIGFsbF9vZihjb2xsYWdlbl9saXN0KSkKc3ViX2FkYXQgPC0gc3ViX2FkYXQgJT4lIHJlbmFtZV9hdCh2YXJzKGNvbGxhZ2VuX2xpc3QpLCB+IGNvbGxhZ2VuX25hbWVzKQpzdWJfYWRhdApgYGAKUGVyZm9ybSB0LXRlc3RzIGJldHdlZW4gQ29udHJvbCBhbmQgRHVwdXl0cmVuIGNvbmRpdGlvbnMgZm9yIGVhY2ggcHJvdGVpbi4KYGBge3J9CkNvaG9ydCA8LSBzdWJfYWRhdCRDb2hvcnQKcmVzIDwtIHN1Yl9hZGF0ICU+JSAKICBzZWxlY3RfaWYoaXMubnVtZXJpYykgJT4lCiAgbWFwX2RmKH4gYnJvb206OnRpZHkodC50ZXN0KC4gfiBDb2hvcnQpJHAudmFsdWUpLCAuaWQgPSAnUHJvdGVpbicpCnJlcyAlPiUgcmVuYW1lKCdwLXZhbHVlJyA9ICd4JykKYGBgCgpMb29wIG92ZXIgZGYgYW5kIHZpc3VhbGl6ZSBkYXRhLgpgYGB7cn0KZm9yKGkgaW4gY29sbGFnZW5fbmFtZXMpIHsKICBwcmludChnZ3Bsb3Qoc3ViX2FkYXQsIGFlcyh4PUNvaG9ydCkpICsgZ2VvbV9ib3hwbG90KGFlc19zdHJpbmcoeT0gaSkpKQp9CmBgYAoKVGhlIGZpbmFsIHBhcnQgb2YgdGhpcyBub3RlYm9vayBpcyBhc3Nlc3Npbmcgb3RoZXIgY29sbGFnZW4gcmVsYXRlZCBhbmQgbW9kaWZ5aW5nIHByb3RlaW5zIChhbW9uZyBzb21lIG90aGVyIGNvbGxhZ2VuIHByb3RlaW5zIGFzIHdlbGwpLgoKU3Vic2V0IGRmIHRvIG9ubHkgZGVzaXJlZCBjb2x1bW5zIGFuZCByZW5hbWUgdG8gRW50cmV6IGdlbmUgSUQgZm9yIGVhc2Ugb2YgZGlzY3JpbWluYXRpb24uCmBgYHtyfQpwcm90ZWluX2xpc3QgPC0gYygic2VxLjEwNDc5LjE4IiwgInNlcS4xMDUxMS4xMCIsInNlcS4xMDYxMi4xOCIsICJzZXEuMTA4MDAuMTUiLCJzZXEuMTExNTAuMyIsInNlcS4xMTE5Ni4zMSIsInNlcS4xMTIzNy40OSIsInNlcS4xMTM0OC4xMzIiLCJzZXEuMTE2NDUuOSIsInNlcS4xMzUzNS4yIiwic2VxLjEzOTUwLjkiICAsInNlcS4xNTQ2Ni4zMCIsInNlcS4xNTU2OS4xNSIsInNlcS4xNjc1My40NiIsInNlcS4xNjgyOC44Iiwic2VxLjE4ODc1LjEyNSIsInNlcS4xODg4MC44MSIsInNlcS4yMDE3NS4xNyIsInNlcS4yMjAxLjE3Iiwic2VxLjIyMDQ3LjQ2Iiwic2VxLjI1NzkuMTciLCJzZXEuMjc4OC41NSIgICwic2VxLjI3ODkuMjYiLCJzZXEuMjk1NC41NiIsInNlcS4zMzQ4LjQ5Iiwic2VxLjQxNjAuNDkiLCJzZXEuNDQ5Ni42MCIsInNlcS40NTQzLjY1Iiwic2VxLjQ5MjQuMzIiLCJzZXEuNDkyNS41NCIsInNlcS41MDAyLjc2Iiwic2VxLjU2MzguMjMiLCJzZXEuNjI3My41OCIsCiJzZXEuNjM4My45MCIsInNlcS42NTcwLjEiLCJzZXEuNjYzMS4xNyIsInNlcS44NDc1LjE1Iiwic2VxLjg0NzkuNCIgLCJzZXEuODg0NS4yIiAsInNlcS45MTcyLjY5IikKCnByb3RlaW5fbmFtZXMgPC0gYygiTU1QMTBfMSIsIkNPTDZBM18xIiwiUExPRDMiLCJTRVJQSU5IMSIsIkNPTDZBMV8xIiwiQ09MNkEzXzIiLCJQQ09MQ0UiLCJQNEhBMiIsIlA0SEExIiwiQ0VSVDFfMSIsIkNFUlQxXzIiLCJDT0w5QTFfMSIsIkNPTDJBMV8xIiwiQ09MNkEyIiwiQ09MNkExXzIiLCJDT0wyQTFfMiIsIkNPTDNBMSIsIkNPTDlBMyIsIkNPTDE4QTEiLCJDT0w1QTEiLCJNTVA5IiwiTU1QMyIsIk1NUDdfMSIsIk1NUDhfMSIsIkJNUDEiLCJNTVAyIiwiTU1QMTIiLCJDT0wyM0ExIiwiTU1QMSIsIk1NUDEzIiwKIk1NUDE0IiwiQ09MR0FMVDEiLCJQM0gxIiwiVExMMSIsIkNPTDEzQTEiLCJDT0w5QTFfMiIsIk1NUDdfMiIsIk1NUDEwXzIiLCJBREFNVFMzIiwiTU1QOF8yIikKCnN1Yl9hZGF0MiA8LSBhZGF0ICU+JSBzZWxlY3QoJ0NvaG9ydCcsIGFsbF9vZihwcm90ZWluX2xpc3QpKQpzdWJfYWRhdDIgPC0gc3ViX2FkYXQyICU+JSByZW5hbWVfYXQodmFycyhwcm90ZWluX2xpc3QpLCB+IHByb3RlaW5fbmFtZXMpCnN1Yl9hZGF0MgpgYGAKClBlcmZvcm0gdC10ZXN0cyBiZXR3ZWVuIENvbnRyb2wgYW5kIER1cHV5dHJlbiBjb25kaXRpb25zIGZvciBlYWNoIHByb3RlaW4uCmBgYHtyfQpDb2hvcnQgPC0gc3ViX2FkYXQyJENvaG9ydApyZXMgPC0gc3ViX2FkYXQyICU+JSAKICBzZWxlY3RfaWYoaXMubnVtZXJpYykgJT4lCiAgbWFwX2RmKH4gYnJvb206OnRpZHkodC50ZXN0KC4gfiBDb2hvcnQpJHAudmFsdWUpLCAuaWQgPSAnUHJvdGVpbicpCnJlcyAlPiUgcmVuYW1lKCdwLXZhbHVlJyA9ICd4JykKYGBgCgpMb29wIG92ZXIgZGYgYW5kIHZpc3VhbGl6ZSBkYXRhLgpgYGB7cn0KZm9yKGkgaW4gcHJvdGVpbl9uYW1lcykgewogIHByaW50KGdncGxvdChzdWJfYWRhdDIsIGFlcyh4PUNvaG9ydCkpICsgZ2VvbV9ib3hwbG90KGFlc19zdHJpbmcoeT0gaSkpKQp9CmBgYAo=